Replicating pipeline and results described in Galbraith et al. (2016). PNAS. https://www.pnas.org/content/113/4/1020
if(suppressWarnings(require("purrr"))==F){
install.packages("purrr")
library(purrr)
}
if(suppressWarnings(require("tidyverse"))==F){
install.packages("tidyverse")
library(tidyverse)
}
if(suppressWarnings(require("stringr"))==F){
install.packages("stringr")
library(tidyverse)
}
if(suppressWarnings(require("plyr"))==F){
install.packages("plyr")
library(plyr)
}
if(suppressWarnings(require("ggplot2"))==F){
install.packages("ggplot2")
library(ggplot2)
}
if(suppressWarnings(require("Rfast"))==F){
install.packages("Rfast")
library(Rfast)
}
if(suppressWarnings(require("tryCatchLog"))==F){
install.packages("tryCatchLog")
library(tryCatchLog)
}
if(suppressWarnings(require("lmerTest"))==F){
install.packages("lmerTest")
library(lmerTest)
}
if(suppressWarnings(require("car"))==F){
install.packages("car")
library(car)
}
if(suppressWarnings(require("viridis"))==F){
install.packages("viridis")
library(viridis)
}
if(suppressWarnings(require("gridExtra"))==F){
install.packages("gridExtra")
library(gridExtra)
}
if(suppressWarnings(require("kableExtra"))==F){
install.packages("kableExtra")
library(kableExtra)
}
metadata <- read.csv("metadata.csv")
kbl(metadata) %>% kable_styling()
| sample.id | parent | block | cross | phenotype | individual | lineage |
|---|---|---|---|---|---|---|
| X8754AQ | Q | 1 | B | reproductive | X8754 | EHB |
| X875cAQ | Q | 1 | B | reproductive | X875c | EHB |
| X875gAQ | Q | 1 | B | reproductive | X875g | EHB |
| X8820AQ | Q | 2 | B | reproductive | X8820 | EHB |
| X882cAQ | Q | 2 | B | reproductive | X882c | EHB |
| X882pAQ | Q | 2 | B | reproductive | X882p | EHB |
| X888aAQ | Q | 1 | A | reproductive | X888a | AHB |
| X888hAQ | Q | 1 | A | reproductive | X888h | AHB |
| X888jAQ | Q | 1 | A | reproductive | X888j | AHB |
| X894bAQ | Q | 2 | A | reproductive | X894b | AHB |
| X894qAQ | Q | 2 | A | reproductive | X894q | AHB |
| X894rAQ | Q | 2 | A | reproductive | X894r | AHB |
| X8751IQ | Q | 1 | B | sterile | X8751 | EHB |
| X8756IQ | Q | 1 | B | sterile | X8756 | EHB |
| X875aIQ | Q | 1 | B | sterile | X875a | EHB |
| X8827IQ | Q | 2 | B | sterile | X8827 | EHB |
| X882gIQ | Q | 2 | B | sterile | X882g | EHB |
| X882kIQ | Q | 2 | B | sterile | X882k | EHB |
| X882mIQ | Q | 2 | B | sterile | X882m | EHB |
| X888eIQ | Q | 1 | A | sterile | X888e | AHB |
| X888qIQ | Q | 1 | A | sterile | X888q | AHB |
| X888uIQ | Q | 1 | A | sterile | X888u | AHB |
| X894cIQ | Q | 2 | A | sterile | X894c | AHB |
| X894fIQ | Q | 2 | A | sterile | X894f | AHB |
| X894iIQ | Q | 2 | A | sterile | X894i | AHB |
| X8754AD | D | 1 | B | reproductive | X8754 | EHB |
| X875cAD | D | 1 | B | reproductive | X875c | EHB |
| X875gAD | D | 1 | B | reproductive | X875g | EHB |
| X8820AD | D | 2 | B | reproductive | X8820 | EHB |
| X882cAD | D | 2 | B | reproductive | X882c | EHB |
| X882pAD | D | 2 | B | reproductive | X882p | EHB |
| X888aAD | D | 1 | A | reproductive | X888a | AHB |
| X888hAD | D | 1 | A | reproductive | X888h | AHB |
| X888jAD | D | 1 | A | reproductive | X888j | AHB |
| X894bAD | D | 2 | A | reproductive | X894b | AHB |
| X894qAD | D | 2 | A | reproductive | X894q | AHB |
| X894rAD | D | 2 | A | reproductive | X894r | AHB |
| X8751ID | D | 1 | B | sterile | X8751 | EHB |
| X8756ID | D | 1 | B | sterile | X8756 | EHB |
| X875aID | D | 1 | B | sterile | X875a | EHB |
| X8827ID | D | 2 | B | sterile | X8827 | EHB |
| X882gID | D | 2 | B | sterile | X882g | EHB |
| X882kID | D | 2 | B | sterile | X882k | EHB |
| X882mID | D | 2 | B | sterile | X882m | EHB |
| X888eID | D | 1 | A | sterile | X888e | AHB |
| X888qID | D | 1 | A | sterile | X888q | AHB |
| X888uID | D | 1 | A | sterile | X888u | AHB |
| X894cID | D | 2 | A | sterile | X894c | AHB |
| X894fID | D | 2 | A | sterile | X894f | AHB |
| X894iID | D | 2 | A | sterile | X894i | AHB |
sterile_counts <- read.csv("sterile_counts.csv")
sterile_counts <- sterile_counts[!sterile_counts$ID==".",]
sterile_counts$SNP.gene <- paste(paste(sterile_counts$Chrm,
sterile_counts$SNP.pos,sep="|"),
sterile_counts$ID,sep=":")
row.names(sterile_counts) <- sterile_counts$SNP.gene
sterile_counts$SNP.gene <- NULL
sterile_counts$Chrm <- NULL
sterile_counts$SNP.pos <- NULL
sterile_counts$ID <- NULL
reproductive_counts <- read.csv("reproductive_counts.csv")
reproductive_counts <- reproductive_counts[!reproductive_counts$ID==".",]
reproductive_counts$SNP.gene <- paste(paste(reproductive_counts$Chrm,
reproductive_counts$SNP.pos,sep="|"),
reproductive_counts$ID,sep=":")
row.names(reproductive_counts) <- reproductive_counts$SNP.gene
reproductive_counts$SNP.gene <- NULL
reproductive_counts$Chrm <- NULL
reproductive_counts$SNP.pos <- NULL
reproductive_counts$ID <- NULL
lcf <- 1
### Remove rows with <lcf counts by cross
L875u <- names(sterile_counts)%in%metadata[metadata$block==1&metadata$cross=="B","sample.id"]
L888u <- names(sterile_counts)%in%metadata[metadata$block==1&metadata$cross=="A","sample.id"]
L882u <- names(sterile_counts)%in%metadata[metadata$block==2&metadata$cross=="B","sample.id"]
L894u <- names(sterile_counts)%in%metadata[metadata$block==2&metadata$cross=="A","sample.id"]
sterile_counts <- sterile_counts[rowSums(sterile_counts[L875u])>lcf,]
sterile_counts <- sterile_counts[rowSums(sterile_counts[L888u])>lcf,]
sterile_counts <- sterile_counts[rowSums(sterile_counts[L882u])>lcf,]
sterile_counts <- sterile_counts[rowSums(sterile_counts[L894u])>lcf,]
rm(L875u,L888u,L882u,L894u)
L875r <- names(reproductive_counts)%in%metadata[metadata$block==1&metadata$cross=="B","sample.id"]
L888r <- names(reproductive_counts)%in%metadata[metadata$block==1&metadata$cross=="A","sample.id"]
L882r <- names(reproductive_counts)%in%metadata[metadata$block==2&metadata$cross=="B","sample.id"]
L894r <- names(reproductive_counts)%in%metadata[metadata$block==2&metadata$cross=="A","sample.id"]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[L875r])>lcf,]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[L888r])>lcf,]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[L882r])>lcf,]
reproductive_counts <- reproductive_counts[rowSums(reproductive_counts[L894r])>lcf,]
rm(L875r,L888r,L882r,L894r)
### Remove rows with greater than 10000 counts (cannot run SK test on these)
sterile_counts$SUM <- rowSums(sterile_counts)
sterile_counts <- sterile_counts[sterile_counts$SUM<10000,]
sterile_counts$SUM <- NULL
reproductive_counts$SUM <- rowSums(reproductive_counts)
reproductive_counts <- reproductive_counts[reproductive_counts$SUM<10000,]
reproductive_counts$SUM <- NULL
sterile.pat <- sterile_counts[,metadata[metadata$parent%in%c("D")&
metadata$phenotype=="sterile","sample.id"]]
sterile.mat <- sterile_counts[,metadata[metadata$parent%in%c("Q")&
metadata$phenotype=="sterile","sample.id"]]
reproductive.pat <- reproductive_counts[,metadata[metadata$parent%in%c("D")&
metadata$phenotype=="reproductive","sample.id"]]
reproductive.mat <- reproductive_counts[,metadata[metadata$parent%in%c("Q")&
metadata$phenotype=="reproductive","sample.id"]]
twobinom](Function from the WRS2 package). Test the hypothesis that two independent binomials have equal probability of success using the Storer–Kim method.
twobinom<-function(r1=sum(elimna(x)),n1=length(x),
r2=sum(elimna(y)),n2=length(y),
x=NA,y=NA,alpha=.05){
# Modified for efficiency: changed outer() command to Rfast::Outer()
n1p<-n1+1
n2p<-n2+1
n1m<-n1-1
n2m<-n2-1
q <- r1/n1
p <- r2/n2
if(is.na(q)){q <- 0}
if(is.na(p)){p <- 0}
chk<-abs(q-p)
x<-c(0:n1)/n1
suppressWarnings(if(is.na(x)){x <- 0})
y<-c(0:n2)/n2
suppressWarnings(if(is.na(y)){y <- 0})
phat<-(r1+r2)/(n1+n2)
m1<-t(Outer(x,y,"-"))
m2<-matrix(1,n1p,n2p)
flag<-(abs(m1)>=chk)
m3<-m2*flag
rm(m1,m2,flag)
xv<-c(1:n1)
yv<-c(1:n2)
xv1<-n1-xv+1
yv1<-n2-yv+1
dis1<-c(1,pbeta(phat,xv,xv1))
dis2<-c(1,pbeta(phat,yv,yv1))
pd1<-NA
pd2<-NA
for(i in 1:n1){pd1[i]<-dis1[i]-dis1[i+1]}
for(i in 1:n2){pd2[i]<-dis2[i]-dis2[i+1]}
pd1[n1p]<-phat^n1
pd2[n2p]<-phat^n2
m4<-t(Outer(pd1,pd2,"*"))
test<-sum(m3*m4)
rm(m3,m4)
list(p.value=test,p1=q,p2=p,est.dif=q-p)
}
test.df]test.df <- function(pat.exp,mat.exp){
i.len <- length(row.names(pat.exp))
return.list <- list()
for(i in 1:i.len){
print(paste("Row",i,"of",i.len,sep=" "))
SNP_gene <- row.names(pat.exp[i,])
p1.s <- sum(pat.exp[i,])
p2.s <- sum(mat.exp[i,])
p.o <- sum(p1.s,p2.s)
test <- twobinom(r1=p1.s,n1=p.o,r2=p2.s,n2=p.o)$p.value
return.df <- data.frame(SNP_gene=SNP_gene,p=test)
return.list[[i]] <- return.df
cat("\014")
}
return(bind_rows(return.list))
}
GLIMMIX]GLIMMIX <- function(counts,metadata){
# Requires tryCatchLog package!
counts$SNP_gene <- row.names(counts)
parent.p.list <- list()
cross.p.list <- list()
parent.cross.p.list <- list()
counts$geneID <- as.character(unlist(map(strsplit(counts$SNP_gene, split = ":"), 2)))
genelist <- unique(counts$geneID)
for(i in 1:length(genelist)){
cat("\014")
print(paste("Row ",i," of ",length(genelist),sep=""))
counts.sub <- counts[counts$geneID==genelist[i],]
counts.sub$geneID <- NULL
counts.sub <- gather(counts.sub, sample.id, count,
names(counts.sub), -SNP_gene, factor_key=TRUE)
counts.sub <- join(counts.sub, metadata, by = "sample.id")
counts.sub$parent <- as.factor(str_sub(counts.sub$parent,-1,-1))
counts.sub$SNP_gene <- as.factor(counts.sub$SNP_gene)
counts.sub$lineage <- as.factor(counts.sub$lineage)
counts.sub$individual <- as.factor(counts.sub$individual)
testfail <- F
test <- "null"
if(length(levels(counts.sub$SNP_gene))>1){
tryCatchLog(test <- lmer(count~parent+lineage+parent*lineage+(1|SNP_gene)+(1|individual),
data=counts.sub),
error = function(e) {testfail <- T})
}else{
tryCatchLog(test <- lmer(count~parent+lineage+parent*lineage+(1|individual),
data=counts.sub),
error = function(e) {testfail <- T})
}
if(class(test)=="character"){testfail <- T}
if(testfail==F){
test <- summary(test)
parent.p.list[i] <- test[["coefficients"]][2,5]
cross.p.list[i] <- test[["coefficients"]][3,5]
parent.cross.p.list[i] <- test[["coefficients"]][4,5]
}else{
parent.p.list[i] <- 1
cross.p.list[i] <- 1
parent.cross.p.list[i] <- 1
}
}
return(data.frame(ID=genelist,
parent.p=unlist(parent.p.list),
cross.p=unlist(cross.p.list),
parentXcross.p=unlist(parent.cross.p.list)))
}
sterile.SK <- test.df(sterile.pat,sterile.mat)
write.csv(sterile.SK,"sterileSK.csv", row.names=F)
reproductive.SK <- test.df(reproductive.pat,reproductive.mat)
write.csv(reproductive.SK,"reproductiveSK.csv", row.names=F)
sterile.GLIMMIX <- GLIMMIX(sterile_counts,metadata)
write.csv(sterile.GLIMMIX,"sterileGLIMMIX.csv", row.names=F)
reproductive.GLIMMIX <- GLIMMIX(reproductive_counts,metadata)
write.csv(reproductive.GLIMMIX,"reproductiveGLIMMIX.csv", row.names=F)
| p | pat/mat | cross | parent |
|---|---|---|---|
| p1 | pat | B | D |
| p1 | mat | B | Q |
| p2 | pat | A | D |
| p2 | mat | A | Q |
# sterile
## p1
p1.sterile.pat <- sterile_counts[,metadata[metadata$parent%in%c("D")&
metadata$cross=="B"&
metadata$phenotype=="sterile","sample.id"]]
p1.sterile.mat <- sterile_counts[,metadata[metadata$parent%in%c("Q")&
metadata$cross=="B"&
metadata$phenotype=="sterile","sample.id"]]
## p2
p2.sterile.pat <- sterile_counts[,metadata[metadata$parent%in%c("D")&
metadata$cross=="A"&
metadata$phenotype=="sterile","sample.id"]]
p2.sterile.mat <- sterile_counts[,metadata[metadata$parent%in%c("Q")&
metadata$cross=="A"&
metadata$phenotype=="sterile","sample.id"]]
# reproductive
## p1
p1.reproductive.pat <- reproductive_counts[,metadata[metadata$parent%in%c("D")&
metadata$cross=="B"&
metadata$phenotype=="reproductive","sample.id"]]
p1.reproductive.mat <- reproductive_counts[,metadata[metadata$parent%in%c("Q")&
metadata$cross=="B"&
metadata$phenotype=="reproductive","sample.id"]]
## p2
p2.reproductive.pat <- reproductive_counts[,metadata[metadata$parent%in%c("D")&
metadata$cross=="A"&
metadata$phenotype=="reproductive","sample.id"]]
p2.reproductive.mat <- reproductive_counts[,metadata[metadata$parent%in%c("Q")&
metadata$cross=="A"&
metadata$phenotype=="reproductive","sample.id"]]
p1.sterile.plot <- data.frame(rowSums(p1.sterile.pat)/
(rowSums(p1.sterile.mat)+
rowSums(p1.sterile.pat)))
names(p1.sterile.plot) <- c("p1")
p1.sterile.plot[is.nan(p1.sterile.plot$p1),"p1"] <- 0
p2.sterile.plot <- data.frame(rowSums(p2.sterile.mat)/
(rowSums(p2.sterile.mat)+
rowSums(p2.sterile.pat)))
names(p2.sterile.plot) <- c("p2")
p2.sterile.plot[is.nan(p2.sterile.plot$p2),"p2"] <- 0
sterile.plot <- cbind(p1.sterile.plot,p2.sterile.plot)
sterile.plot <- sterile.plot[row.names(sterile.plot)%in%sterile.SK$SNP_gene,]
sterile.plot$SK.p <- sterile.SK$p
sterile.plot$SNP_gene <- row.names(sterile.plot)
sterile.plot$gene <- as.character(map(strsplit(sterile.plot$SNP_gene, split = ":"), 2))
p1.reproductive.plot <- data.frame(rowSums(p1.reproductive.pat)/
(rowSums(p1.reproductive.mat)+
rowSums(p1.reproductive.pat)))
names(p1.reproductive.plot) <- c("p1")
p1.reproductive.plot[is.nan(p1.reproductive.plot$p1),"p1"] <- 0
p2.reproductive.plot <- data.frame(rowSums(p2.reproductive.mat)/
(rowSums(p2.reproductive.mat)+
rowSums(p2.reproductive.pat)))
names(p2.reproductive.plot) <- c("p2")
p2.reproductive.plot[is.nan(p2.reproductive.plot$p2),"p2"] <- 0
reproductive.plot <- cbind(p1.reproductive.plot,p2.reproductive.plot)
reproductive.plot <- reproductive.plot[row.names(reproductive.plot)%in%reproductive.SK$SNP_gene,]
reproductive.plot$SK.p <- reproductive.SK$p
reproductive.plot$SNP_gene <- row.names(reproductive.plot)
reproductive.plot$gene <- as.character(map(strsplit(reproductive.plot$SNP_gene, split = ":"), 2))
sterile.GLIMMIX.biased <- data.frame(gene=sterile.GLIMMIX$ID,
parent.p=sterile.GLIMMIX$parent.p,
cross.p=sterile.GLIMMIX$cross.p,
parentXcross=sterile.GLIMMIX$parentXcross.p)
reproductive.GLIMMIX.biased <- data.frame(gene=reproductive.GLIMMIX$ID,
parent.p=reproductive.GLIMMIX$parent.p,
cross.p=reproductive.GLIMMIX$cross.p,
parentXcross=reproductive.GLIMMIX$parentXcross.p)
sterile.plot$SK.padj <- p.adjust(sterile.plot$SK.p,"BH")
sterile.plot$bias <- "NA"
reproductive.plot$SK.padj <- p.adjust(reproductive.plot$SK.p,"BH")
reproductive.plot$bias <- "NA"
sterile.GLIMMIX$parent.padj <- p.adjust(sterile.GLIMMIX$parent.p,"BH")
sterile.GLIMMIX$cross.padj <- p.adjust(sterile.GLIMMIX$cross.p,"BH")
reproductive.GLIMMIX$parent.padj <- p.adjust(reproductive.GLIMMIX$parent.p,"BH")
reproductive.GLIMMIX$cross.padj <- p.adjust(reproductive.GLIMMIX$cross.p,"BH")
sterile.GLIMMIX.biased <- sterile.GLIMMIX[sterile.GLIMMIX$parent.padj<0.05|sterile.GLIMMIX$cross.padj<0.05,1]
sterile.GLIMMIX.biased <- setdiff(sterile.GLIMMIX.biased,
sterile.GLIMMIX[sterile.GLIMMIX$parentXcross.padj<0.05,1])
reproductive.GLIMMIX.biased <- reproductive.GLIMMIX[reproductive.GLIMMIX$parent.padj<0.05|reproductive.GLIMMIX$cross.padj<0.05,1]
reproductive.GLIMMIX.biased <- setdiff(reproductive.GLIMMIX.biased,
reproductive.GLIMMIX[reproductive.GLIMMIX$parentXcross.padj<0.05,1])
for(i in 1:length(row.names(sterile.plot))){
p <- sterile.plot[i,"SK.padj"]
p1 <- sterile.plot[i,"p1"]
p2 <- sterile.plot[i,"p2"]
if(p<0.05&p1>0.6&p2<0.4){sterile.plot[i,"bias"] <- "pat"}
if(p<0.05&p1<0.4&p2>0.6){sterile.plot[i,"bias"] <- "mat"}
if(p<0.05&p1<0.4&p2<0.4){sterile.plot[i,"bias"] <- "EHB"}
if(p<0.05&p1>0.6&p2>0.6){sterile.plot[i,"bias"] <- "AHB"}
}
biaslist <- data.frame(matrix(ncol=2,nrow=0))
names(biaslist) <- c("gene","bias")
genelist <- unique(sterile.plot$gene)
for(i in 1:length(genelist)){
tmp <- unique(sterile.plot[sterile.plot$gene==genelist[i],"bias"])
if(length(tmp)>1){
if(length(tmp)==2){
if(any(tmp%in%"NA")){
bias <- tmp[!tmp%in%"NA"]
}
}else{
bias <- "NA"
}
}else{bias <- tmp}
biaslist <- rbind(biaslist,data.frame(gene=genelist[[i]], bias=bias))
}
sterile.plot <- sterile.plot %>%
left_join(biaslist, by = c('gene' = 'gene'))
names(sterile.plot)[c(7:8)] <- c("xbias","bias")
sterile.plot$bias.plot <- "NA"
for(i in 1:length(row.names(sterile.plot))){
p1 <- sterile.plot$p1[i]
p2 <- sterile.plot$p2[i]
bias <- sterile.plot$bias[i]
if(!bias=="NA"){
if(bias=="pat"){if(p1>0.6&p2<0.4){sterile.plot[i,"bias.plot"]<- "pat"}}
if(bias=="mat"){if(p1<0.4&p2>0.6){sterile.plot[i,"bias.plot"] <- "mat"}}
if(bias=="EHB"){if(p1<0.4&p2<0.4){sterile.plot[i,"bias.plot"] <- "EHB"}}
if(bias=="AHB"){if(p1>0.6&p2>0.6){sterile.plot[i,"bias.plot"] <- "AHB"}}
}
}
sterile.plot0 <- sterile.plot
sterile.plot[!sterile.plot$gene%in%sterile.GLIMMIX.biased,"bias"] <- "NA"
sterile.plot[!sterile.plot$gene%in%sterile.GLIMMIX.biased,"bias.plot"] <- "NA"
sterile.plot <- rbind(sterile.plot[sterile.plot$bias.plot%in%c("NA"),],
sterile.plot[sterile.plot$bias.plot%in%c("mat", "AHB", "EHB", "pat"),])
sterile.plot$bias.plot <- factor(sterile.plot$bias.plot,
levels = c("NA","mat", "AHB", "EHB", "pat"))
write.csv(sterile.plot,"sterile_PSGE.csv",row.names=F)
g1 <- ggplot(sterile.plot, aes(x=p1, y=p2,
color=bias.plot,alpha=0.8)) +
geom_point(size=1) + theme_classic() +
xlab(expression(paste("% A allele in ",EHB[mother],
" x ",AHB[father],sep=""))) +
ylab(expression(paste("% A allele in ",AHB[mother],
" x ",EHB[father],sep=""))) +
ggtitle("sterile workers") +
theme(text = element_text(size=20),
plot.title = element_text(hjust = 0.5)) +
scale_color_manual(breaks = levels(sterile.plot$bias.plot)[-c(1)],
values=c("grey90", "#039827" ,"#83756f", "#bc9a42", "#e42b37")) +
guides(alpha=F, color=F)
g1
for(i in 1:length(row.names(reproductive.plot))){
p <- reproductive.plot[i,"SK.padj"]
p1 <- reproductive.plot[i,"p1"]
p2 <- reproductive.plot[i,"p2"]
if(p<0.05&p1>0.6&p2<0.4){reproductive.plot[i,"bias"] <- "pat"}
if(p<0.05&p1<0.4&p2>0.6){reproductive.plot[i,"bias"] <- "mat"}
if(p<0.05&p1<0.4&p2<0.4){reproductive.plot[i,"bias"] <- "EHB"}
if(p<0.05&p1>0.6&p2>0.6){reproductive.plot[i,"bias"] <- "AHB"}
}
biaslist <- data.frame(matrix(ncol=2,nrow=0))
names(biaslist) <- c("gene","bias")
genelist <- unique(reproductive.plot$gene)
for(i in 1:length(genelist)){
tmp <- unique(reproductive.plot[reproductive.plot$gene==genelist[i],"bias"])
if(length(tmp)>1){
if(length(tmp)==2){
if(any(tmp%in%"NA")){
bias <- tmp[!tmp%in%"NA"]
}
}else{
bias <- "NA"
}
}else{bias <- tmp}
biaslist <- rbind(biaslist,data.frame(gene=genelist[[i]], bias=bias))
}
reproductive.plot <- reproductive.plot %>%
left_join(biaslist, by = c('gene' = 'gene'))
names(reproductive.plot)[c(7:8)] <- c("xbias","bias")
reproductive.plot$bias.plot <- "NA"
for(i in 1:length(row.names(reproductive.plot))){
p1 <- reproductive.plot$p1[i]
p2 <- reproductive.plot$p2[i]
bias <- reproductive.plot$bias[i]
if(!bias=="NA"){
if(bias=="pat"){if(p1>0.6&p2<0.4){reproductive.plot[i,"bias.plot"]<- "pat"}}
if(bias=="mat"){if(p1<0.4&p2>0.6){reproductive.plot[i,"bias.plot"] <- "mat"}}
if(bias=="EHB"){if(p1<0.4&p2<0.4){reproductive.plot[i,"bias.plot"] <- "EHB"}}
if(bias=="AHB"){if(p1>0.6&p2>0.6){reproductive.plot[i,"bias.plot"] <- "AHB"}}
}
}
reproductive.plot0 <- reproductive.plot
reproductive.plot[!reproductive.plot$gene%in%reproductive.GLIMMIX.biased,"bias"] <- "NA"
reproductive.plot[!reproductive.plot$gene%in%reproductive.GLIMMIX.biased,"bias.plot"] <- "NA"
reproductive.plot <- rbind(reproductive.plot[reproductive.plot$bias.plot%in%c("NA"),],
reproductive.plot[reproductive.plot$bias.plot%in%c("mat", "AHB", "EHB", "pat"),])
reproductive.plot$bias.plot <- factor(reproductive.plot$bias.plot,
levels = c("NA","mat", "AHB", "EHB", "pat"))
write.csv(reproductive.plot,"reproductive_PSGE.csv",row.names=F)
g2 <- ggplot(reproductive.plot, aes(x=p1, y=p2,
color=bias.plot,alpha=0.8)) +
geom_point(size=1) + theme_classic() +
xlab(expression(paste("% A allele in ",EHB[mother],
" x ",AHB[father],sep=""))) +
ylab(expression(paste("% A allele in ",AHB[mother],
" x ",EHB[father],sep=""))) +
ggtitle("reproductive workers") +
theme(text = element_text(size=20),
plot.title = element_text(hjust = 0.5)) +
scale_color_manual(breaks = levels(reproductive.plot$bias.plot)[-c(1)],
values=c("grey90", "#039827" ,"#83756f", "#bc9a42", "#e42b37")) +
guides(alpha=F, color=F)
g2
gmid.df <- data.frame(sterile=c(length(unique(sterile.plot[sterile.plot$bias.plot=="mat","gene"])),
length(unique(sterile.plot[sterile.plot$bias.plot=="AHB","gene"])),
length(unique(sterile.plot[sterile.plot$bias.plot=="EHB","gene"])),
length(unique(sterile.plot[sterile.plot$bias.plot=="pat","gene"]))),
Bias=c("mat","AHB","EHB","pat"),
reproductive=c(length(unique(reproductive.plot[reproductive.plot$bias=="mat","gene"])),
length(unique(reproductive.plot[reproductive.plot$bias.plot=="AHB","gene"])),
length(unique(reproductive.plot[reproductive.plot$bias.plot=="EHB","gene"])),
length(unique(reproductive.plot[reproductive.plot$bias.plot=="pat","gene"]))))
allgenes <- read.csv("NCBI.csv",header=F)[,c(1)]
## Test if # of sterile biased genes is different from reproductive biased genes
mat.test <- chisq.test(data.frame(Success=c(gmid.df[1,1],gmid.df[1,3]),
Failure=c(length(allgenes)-gmid.df[1,1],
length(allgenes)-gmid.df[1,3]),
row.names=c("sterile","reproductive")))$p.value
AHB.test <- chisq.test(data.frame(Success=c(gmid.df[2,1],gmid.df[2,3]),
Failure=c(length(allgenes)-gmid.df[2,1],
length(allgenes)-gmid.df[2,3]),
row.names=c("sterile","reproductive")))$p.value
## Warning in chisq.test(data.frame(Success = c(gmid.df[2, 1], gmid.df[2, 3]), :
## Chi-squared approximation may be incorrect
EHB.test <- chisq.test(data.frame(Success=c(gmid.df[3,1],gmid.df[3,3]),
Failure=c(length(allgenes)-gmid.df[3,1],
length(allgenes)-gmid.df[3,3]),
row.names=c("sterile","reproductive")))$p.value
pat.test <- chisq.test(data.frame(Success=c(gmid.df[4,1],gmid.df[4,3]),
Failure=c(length(allgenes)-gmid.df[4,1],
length(allgenes)-gmid.df[4,3]),
row.names=c("sterile","reproductive")))$p.value
## Build table
gmid.df$`.` <- c(mat.test,AHB.test,EHB.test,pat.test)
gmid.df <- gmid.df[,c(4,1,2,3)]
nsrows <- row.names(gmid.df[gmid.df$`.`>0.05,])
gmid.df$`.` <- formatC(gmid.df$`.`, format = "e", digits = 2)
gmid.df[nsrows,"."] <- "(ns)"
gmid.df <- gmid.df[,c(2,3,4,1)]
cols <- matrix("black", nrow(gmid.df), ncol(gmid.df))
cols[1,2] <- "#039827"
cols[2,2] <- "#83756f"
cols[3,2] <- "#bc9a42"
cols[4,2] <- "#e42b37"
ccols <- matrix("white", nrow(gmid.df), ncol(gmid.df))
ccols[1,3] <- "#f4efea"
ccols[2,3] <- "#f4efea"
ccols[3,3] <- "#f4efea"
ccols[4,3] <- "#f4efea"
ccols[1,1] <- "#f4efea"
ccols[2,1] <- "#f4efea"
ccols[3,1] <- "#f4efea"
ccols[4,1] <- "#f4efea"
ccols[1,2] <- "#e4d8d1"
ccols[2,2] <- "#e4d8d1"
ccols[3,2] <- "#e4d8d1"
ccols[4,2] <- "#e4d8d1"
cfonts <- matrix("plain", nrow(gmid.df), ncol(gmid.df))
cfonts[1,2] <- "bold"
cfonts[2,2] <- "bold"
cfonts[3,2] <- "bold"
cfonts[4,2] <- "bold"
names(gmid.df) <- c("sterile","Bias","reproductive",".")
gmid.df[2,2] <- "AHB"
gmid.df[3,2] <- "EHB"
tt <- ttheme_default(core=list(fg_params = list(col = cols,
cex = 1,
fontface = cfonts),
bg_params = list(col=NA,
fill = ccols),
padding.h=unit(2, "mm")),
rowhead=list(bg_params = list(col=NA)),
colhead=list(bg_params = list(fill = c("#f4efea",
"#e4d8d1",
"#f4efea",
"white")),
fg_params = list(rot=90,
cex = 1,
col = c("black",
"black",
"black",
"white"))))
gmid <- tableGrob(gmid.df, rows = NULL, theme=tt)
plot(gmid)
fig1 <- arrangeGrob(g1, gmid, g2, widths=c(5,2.5,5))
ggsave(file="fig1.png", plot=fig1, width=15, height=6)